MVP

Now we’ll go back to CI creation in the normal fashion. We’ll take the ames data from the CIs lab earlier today and regard it now as a sample, we won’t be drawing any smaller samples from within it. This is the usual situation in an analysis: you use all the data available to you!


Task 1.
Load the data again, clean_names(), and re-familiarise yourself with it


Ans 1.

Load libraries:

library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages --------------------------------------------------------------------------------------- tidyverse 1.3.1 --
√ ggplot2 3.3.5     √ purrr   0.3.4
√ tibble  3.1.6     √ dplyr   1.0.8
√ tidyr   1.2.0     √ stringr 1.4.0
√ readr   2.1.2     √ forcats 0.5.1
-- Conflicts ------------------------------------------------------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(infer)
Warning: package ‘infer’ was built under R version 4.1.3
library(janitor)

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test

Load and explore the data held in ames.csv

ames <- read_csv("data/ames.csv") %>%
  clean_names()
Rows: 2930 Columns: 82
-- Column specification --------------------------------------------------------------------------------------------------------
Delimiter: ","
chr (43): MS.Zoning, Street, Alley, Lot.Shape, Land.Contour, Utilities, Lot.Config, Land.Slope, Neighborhood, Condition.1, C...
dbl (39): Order, PID, MS.SubClass, Lot.Frontage, Lot.Area, Overall.Qual, Overall.Cond, Year.Built, Year.Remod.Add, Mas.Vnr.A...

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(ames)
Rows: 2,930
Columns: 82
$ order           <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,~
$ pid             <dbl> 526301100, 526350040, 526351010, 526353030, 527105010, 527105030, 527127150, 527145080, 527146030, 527~
$ ms_sub_class    <dbl> 20, 20, 20, 20, 60, 60, 120, 120, 120, 60, 60, 20, 60, 20, 120, 60, 50, 20, 20, 20, 20, 85, 60, 20, 20~
$ ms_zoning       <chr> "RL", "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", ~
$ lot_frontage    <dbl> 141, 80, 81, 93, 74, 78, 41, 43, 39, 60, 75, NA, 63, 85, NA, 47, 152, 88, 140, 85, 105, 85, NA, NA, NA~
$ lot_area        <dbl> 31770, 11622, 14267, 11160, 13830, 9978, 4920, 5005, 5389, 7500, 10000, 7980, 8402, 10176, 6820, 53504~
$ street          <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave"~
$ alley           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
$ lot_shape       <chr> "IR1", "Reg", "IR1", "Reg", "IR1", "IR1", "Reg", "IR1", "IR1", "Reg", "IR1", "IR1", "IR1", "Reg", "IR1~
$ land_contour    <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "HLS", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl~
$ utilities       <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "A~
$ lot_config      <chr> "Corner", "Inside", "Corner", "Corner", "Inside", "Inside", "Inside", "Inside", "Inside", "Inside", "C~
$ land_slope      <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl~
$ neighborhood    <chr> "NAmes", "NAmes", "NAmes", "NAmes", "Gilbert", "Gilbert", "StoneBr", "StoneBr", "StoneBr", "Gilbert", ~
$ condition_1     <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm~
$ condition_2     <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm"~
$ bldg_type       <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "TwnhsE", "TwnhsE", "TwnhsE", "1Fam", "1Fam", "1Fam", ~
$ house_style     <chr> "1Story", "1Story", "1Story", "1Story", "2Story", "2Story", "1Story", "1Story", "1Story", "2Story", "2~
$ overall_qual    <dbl> 6, 5, 6, 7, 5, 6, 8, 8, 8, 7, 6, 6, 6, 7, 8, 8, 8, 9, 4, 6, 6, 7, 7, 6, 5, 5, 4, 4, 7, 6, 5, 6, 6, 6, ~
$ overall_cond    <dbl> 5, 6, 6, 5, 5, 6, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 7, 2, 5, 6, 6, 6, 5, 7, 6, 6, 5, 5, 5, 5, 5, 5, 5, 6, ~
$ year_built      <dbl> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, 1999, 1993, 1992, 1998, 1990, 1985, 2003, 1988, ~
$ year_remod_add  <dbl> 1960, 1961, 1958, 1968, 1998, 1998, 2001, 1992, 1996, 1999, 1994, 2007, 1998, 1990, 1985, 2003, 2005, ~
$ roof_style      <chr> "Hip", "Gable", "Hip", "Hip", "Gable", "Gable", "Gable", "Gable", "Gable", "Gable", "Gable", "Gable", ~
$ roof_matl       <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "CompShg", "Co~
$ exterior_1st    <chr> "BrkFace", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd", "VinylSd", "CemntBd", "HdBoard", "CemntBd", "Vi~
$ exterior_2nd    <chr> "Plywood", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd", "VinylSd", "CmentBd", "HdBoard", "CmentBd", "Vi~
$ mas_vnr_type    <chr> "Stone", "None", "BrkFace", "None", "None", "BrkFace", "None", "None", "None", "None", "None", "None",~
$ mas_vnr_area    <dbl> 112, 0, 108, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 603, 0, 350, 0, 119, 480, 81, 0, 180, 0, 0, 0, 0, 0,~
$ exter_qual      <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "Gd", "Gd", "Gd", "TA", "TA", "TA", "TA", "TA", "Gd", "Ex", "Gd", ~
$ exter_cond      <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", "TA", ~
$ foundation      <chr> "CBlock", "CBlock", "CBlock", "CBlock", "PConc", "PConc", "PConc", "PConc", "PConc", "PConc", "PConc",~
$ bsmt_qual       <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Gd", "Gd", "Gd", "Gd", "Gd", ~
$ bsmt_cond       <chr> "Gd", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", ~
$ bsmt_exposure   <chr> "Gd", "No", "No", "No", "No", "No", "Mn", "No", "No", "No", "No", "No", "No", "Gd", "Av", "Gd", "Av", ~
$ bsmt_fin_type_1 <chr> "BLQ", "Rec", "ALQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ", "GLQ", "Unf", "Unf", "ALQ", "Unf", "GLQ", "GLQ~
$ bsmt_fin_sf_1   <dbl> 639, 468, 923, 1065, 791, 602, 616, 263, 1180, 0, 0, 935, 0, 637, 368, 1416, 427, 1445, 120, 790, 705,~
$ bsmt_fin_type_2 <chr> "Unf", "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "BLQ~
$ bsmt_fin_sf_2   <dbl> 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1120, 0, 0, 0, 0, 163, 0, 168, 0, 0, 0, 0, 78, 0, 0, 0, 0,~
$ bsmt_unf_sf     <dbl> 441, 270, 406, 1045, 137, 324, 722, 1017, 415, 994, 763, 233, 789, 663, 0, 234, 132, 411, 744, 589, 11~
$ total_bsmt_sf   <dbl> 1080, 882, 1329, 2110, 928, 926, 1338, 1280, 1595, 994, 763, 1168, 789, 1300, 1488, 1650, 559, 1856, 8~
$ heating         <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA"~
$ heating_qc      <chr> "Fa", "TA", "TA", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", "Gd", "Gd", "Ex", "Gd", "Gd", "TA", "Ex", "Gd", ~
$ central_air     <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y~
$ electrical      <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SB~
$ x1st_flr_sf     <dbl> 1656, 896, 1329, 2110, 928, 926, 1338, 1280, 1616, 1028, 763, 1187, 789, 1341, 1502, 1690, 1080, 1856,~
$ x2nd_flr_sf     <dbl> 0, 0, 0, 0, 701, 678, 0, 0, 0, 776, 892, 0, 676, 0, 0, 1589, 672, 0, 0, 0, 0, 0, 860, 0, 0, 0, 0, 0, 0~
$ low_qual_fin_sf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
$ gr_liv_area     <dbl> 1656, 896, 1329, 2110, 1629, 1604, 1338, 1280, 1616, 1804, 1655, 1187, 1465, 1341, 1502, 3279, 1752, 1~
$ bsmt_full_bath  <dbl> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, ~
$ bsmt_half_bath  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
$ full_bath       <dbl> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, ~
$ half_bath       <dbl> 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, ~
$ bedroom_abv_gr  <dbl> 3, 2, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 1, 4, 4, 1, 2, 3, 3, 3, 3, 2, 3, 3, 2, 3, 2, 2, 3, 3, 2, 3, ~
$ kitchen_abv_gr  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
$ kitchen_qual    <chr> "TA", "TA", "Gd", "Ex", "TA", "Gd", "Gd", "Gd", "Gd", "Gd", "TA", "TA", "TA", "Gd", "Gd", "Ex", "TA", ~
$ tot_rms_abv_grd <dbl> 7, 5, 6, 8, 6, 7, 6, 5, 5, 7, 7, 6, 7, 5, 4, 12, 8, 8, 4, 7, 7, 6, 7, 5, 6, 6, 4, 5, 5, 5, 6, 6, 4, 6,~
$ functional      <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ~
$ fireplaces      <dbl> 2, 0, 0, 2, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 2, 1, 2, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, ~
$ fireplace_qu    <chr> "Gd", NA, NA, "TA", "TA", "Gd", NA, NA, "TA", "TA", "TA", NA, "Gd", "Po", NA, "Gd", NA, "Ex", NA, "TA"~
$ garage_type     <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "A~
$ garage_yr_blt   <dbl> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, 1999, 1993, 1992, 1998, 1990, 1985, 2003, 1988, ~
$ garage_finish   <chr> "Fin", "Unf", "Unf", "Fin", "Fin", "Fin", "Fin", "RFn", "RFn", "Fin", "Fin", "Fin", "Fin", "Unf", "RFn~
$ garage_cars     <dbl> 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 2, 2, 2, 2, 2, 2, 2, 1, 2, 0, 2, 1, 1, 1, 2, 2, ~
$ garage_area     <dbl> 528, 730, 312, 522, 482, 470, 582, 506, 608, 442, 440, 420, 393, 506, 528, 841, 492, 834, 400, 500, 54~
$ garage_qual     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", ~
$ garage_cond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", ~
$ paved_drive     <chr> "P", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y~
$ wood_deck_sf    <dbl> 210, 140, 393, 0, 212, 360, 0, 0, 237, 140, 157, 483, 0, 192, 0, 503, 325, 113, 0, 349, 0, 0, 0, 0, 0,~
$ open_porch_sf   <dbl> 62, 0, 36, 0, 34, 36, 0, 82, 152, 60, 84, 21, 75, 0, 54, 36, 12, 0, 0, 0, 122, 120, 96, 0, 0, 85, 0, 0~
$ enclosed_porch  <dbl> 0, 0, 0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184, 0, 0, 0, 0, 0, 0, 0,~
$ x3ssn_porch     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
$ screen_porch    <dbl> 0, 120, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 140, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ pool_area       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
$ pool_qc         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
$ fence           <chr> NA, "MnPrv", NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, "GdPrv", NA, NA, NA, NA, NA, NA, NA, "MnPrv", "M~
$ misc_feature    <chr> NA, NA, "Gar2", NA, NA, NA, NA, NA, NA, NA, NA, "Shed", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "S~
$ misc_val        <dbl> 0, 0, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 500, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 700, 0, 0, 0, 0, 0, 0, 0, 0~
$ mo_sold         <dbl> 5, 6, 6, 4, 3, 6, 4, 1, 3, 6, 4, 3, 5, 2, 6, 6, 6, 6, 6, 2, 1, 1, 1, 3, 4, 7, 4, 4, 6, 2, 3, 3, 7, 6, ~
$ yr_sold         <dbl> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, ~
$ sale_type       <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", ~
$ sale_condition  <chr> "Normal", "Normal", "Normal", "Normal", "Normal", "Normal", "Normal", "Normal", "Normal", "Normal", "N~
$ sale_price      <dbl> 215000, 105000, 172000, 244000, 189900, 195500, 213500, 191500, 236500, 189000, 175900, 185000, 180400~

Check data for NA values

ames %>%
  summarise(across(.cols = everything(),
                   .fns = ~sum(is.na(.x))))
NA


Task 2.
Investigate the distribution of lot_area. Is the distribution roughly normal? If not, what problems do you find?


Ans 2.

ames %>%
  ggplot(aes(x = lot_area)) +
  geom_histogram(col = "white", fill = "steel blue", alpha = 0.7, binwidth = 2000)

The distribution is not normal. There’s a significant right-skew.


Task 3.
Compute and visualise a bootstrap sampling distribution for the mean(lot_area) of the sold houses.


Ans 3.

bootstrap_sample <- ames %>%
  rep_sample_n(size = nrow(ames), replace = TRUE, reps = 1000) %>% 
  summarise(
    mean_lot_area = mean(lot_area)
  )

bootstrap_sample 
bootstrap_sample %>% 
  ggplot(aes(x = mean_lot_area)) +
  geom_histogram(col = "white", fill = "steel blue", alpha = 0.7)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary_sample <- bootstrap_sample %>%
  summarise(mean_lot_area = mean(mean_lot_area)) 

summary_sample


Task 4.
Use your bootstrap distribution to calculate a \(95\%\) CI for mean(lot_area), and visualise it on the distribution


Ans 4.

# specify that we want to look at tenure variable, 1000 reps, and want to calculate the mean
infer_resample <- bootstrap_sample %>%
  specify(response = mean_lot_area) %>%
  generate(reps = 1000, type = "bootstrap") %>%
  calculate(stat = "mean")

infer_resample
Response: mean_lot_area (numeric)
infer_ci_95 <- infer_resample %>% 
  get_confidence_interval(level = 0.95, type = "percentile")

infer_ci_95
infer_resample %>%
  visualise(bin = 20) +
  shade_confidence_interval(endpoints = infer_ci_95)


Task 5.
You would like to know the mean(lot_area) of the sold houses with higher confidence. Calculate the \(99\%\) CI for this variable (you can re-use your bootstrap distribution from above). Is it narrower or broader than the \(95\%\) CI? Does that make sense?


Ans 5.

infer_ci_99 <- infer_resample %>% 
  get_confidence_interval(level = 0.99, type = "percentile")

infer_ci_99

The \(99\%\) CI is wider than the \(95\%\) CI. The bounds have moved away from the mean lot area.


Task 6.
Calculate the point estimate of the mean(lot_area)


Ans 6.

sample_200 <- ames %>%
  rep_sample_n(size = 200, reps = 1)

sample_200

summary_sample_200 <- sample_200 %>%
  ungroup() %>%
  summarise(mean_lot_area = mean(lot_area)) 

summary_sample_200

Extension


Task 1.
Calculate a point estimate and \(95\%\) CI for the proportion of houses in the data built before 1920. Does the number of reps you use matter? [Investigate reps from \(200\) up to \(50000\), memory of your laptop permitting].

Ans.

sample_year_built <- ames %>%
  filter(year_built < 1920) %>% 
  rep_sample_n(size = 100, reps = 1)

sample_year_built

summary_sample_year_built <- sample_year_built %>%
  ungroup() %>%
  summarise(
    mean_lot_area = mean(lot_area),
    lower_bound = quantile(lot_area, probs = 0.025),
    upper_bound = quantile(lot_area, probs = 0.975),
    ci_width = upper_bound - lower_bound
    ) 

summary_sample_year_built
sample_year_built <- ames %>%
  filter(year_built < 1920) %>% 
  rep_sample_n(size = 100, reps = 200)

sample_year_built

summary_sample_year_built <- sample_year_built %>%
  ungroup() %>%
  summarise(
    mean_lot_area = mean(lot_area),
    lower_bound = quantile(lot_area, probs = 0.025),
    upper_bound = quantile(lot_area, probs = 0.975),
    ci_width = upper_bound - lower_bound
    ) 

summary_sample_year_built
sample_year_built <- ames %>%
  filter(year_built < 1920) %>% 
  rep_sample_n(size = 100, reps = 1000)

sample_year_built

summary_sample_year_built <- sample_year_built %>%
  ungroup() %>%
  summarise(
    mean_lot_area = mean(lot_area),
    lower_bound = quantile(lot_area, probs = 0.025),
    upper_bound = quantile(lot_area, probs = 0.975),
    ci_width = upper_bound - lower_bound
    ) 

summary_sample_year_built
sample_year_built <- ames %>%
  filter(year_built < 1920) %>% 
  rep_sample_n(size = 100, reps = 2000)

sample_year_built

summary_sample_year_built <- sample_year_built %>%
  ungroup() %>%
  summarise(
    mean_lot_area = mean(lot_area),
    lower_bound = quantile(lot_area, probs = 0.025),
    upper_bound = quantile(lot_area, probs = 0.975),
    ci_width = upper_bound - lower_bound
    ) 

summary_sample_year_built
sample_year_built <- ames %>%
  filter(year_built < 1920) %>% 
  rep_sample_n(size = 100, reps = 5000)

sample_year_built

summary_sample_year_built <- sample_year_built %>%
  ungroup() %>%
  summarise(
    mean_lot_area = mean(lot_area),
    lower_bound = quantile(lot_area, probs = 0.025),
    upper_bound = quantile(lot_area, probs = 0.975),
    ci_width = upper_bound - lower_bound
    ) 

summary_sample_year_built
sample_year_built <- ames %>%
  filter(year_built < 1920) %>% 
  rep_sample_n(size = 100, reps = 10000)

sample_year_built

summary_sample_year_built <- sample_year_built %>%
  ungroup() %>%
  summarise(
    mean_lot_area = mean(lot_area),
    lower_bound = quantile(lot_area, probs = 0.025),
    upper_bound = quantile(lot_area, probs = 0.975),
    ci_width = upper_bound - lower_bound
    ) 

summary_sample_year_built
sample_year_built <- ames %>%
  filter(year_built < 1920) %>% 
  rep_sample_n(size = 100, reps = 50000)

sample_year_built

summary_sample_year_built <- sample_year_built %>%
  ungroup() %>%
  summarise(
    mean_lot_area = mean(lot_area),
    lower_bound = quantile(lot_area, probs = 0.025),
    upper_bound = quantile(lot_area, probs = 0.975),
    ci_width = upper_bound - lower_bound
    ) 

summary_sample_year_built

For more than 200 repetitions, there’s very little change in the mean_lot_area calculation and no change observed in the CI calculation.

LS0tDQp0aXRsZTogIkhvbWV3b3JrIC0gQ29uZmlkZW5jZSBJbnRlcnZhbHMiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCjxocj4NCg0KIyBNVlANCg0KTm93IHdlJ2xsIGdvIGJhY2sgdG8gQ0kgY3JlYXRpb24gaW4gdGhlIG5vcm1hbCBmYXNoaW9uLiBXZSdsbCB0YWtlIHRoZSBgYW1lc2AgZGF0YSBmcm9tIHRoZSBDSXMgbGFiIGVhcmxpZXIgdG9kYXkgYW5kIHJlZ2FyZCBpdCBub3cgYXMgYSAqKnNhbXBsZSoqLCB3ZSB3b24ndCBiZSBkcmF3aW5nIGFueSBzbWFsbGVyIHNhbXBsZXMgZnJvbSB3aXRoaW4gaXQuIFRoaXMgaXMgdGhlIHVzdWFsIHNpdHVhdGlvbiBpbiBhbiBhbmFseXNpczogeW91IHVzZSBhbGwgdGhlIGRhdGEgYXZhaWxhYmxlIHRvIHlvdSENCg0KPGJyPg0KDQoqKlRhc2sgMS4qKiAgDQpMb2FkIHRoZSBkYXRhIGFnYWluLCBgY2xlYW5fbmFtZXMoKWAsIGFuZCByZS1mYW1pbGlhcmlzZSB5b3Vyc2VsZiB3aXRoIGl0DQoNCjxicj4NCg0KKipBbnMgMS4gKioNCg0KTG9hZCBsaWJyYXJpZXM6DQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShpbmZlcikNCmxpYnJhcnkoamFuaXRvcikNCmBgYA0KDQpMb2FkIGFuZCBleHBsb3JlIHRoZSBkYXRhIGhlbGQgaW4gYW1lcy5jc3YNCmBgYHtyfQ0KYW1lcyA8LSByZWFkX2NzdigiZGF0YS9hbWVzLmNzdiIpICU+JQ0KICBjbGVhbl9uYW1lcygpDQpgYGANCg0KYGBge3J9DQpnbGltcHNlKGFtZXMpDQpgYGANCg0KDQpDaGVjayBkYXRhICBmb3IgTkEgdmFsdWVzDQpgYGB7cn0NCmFtZXMgJT4lDQogIHN1bW1hcmlzZShhY3Jvc3MoLmNvbHMgPSBldmVyeXRoaW5nKCksDQogICAgICAgICAgICAgICAgICAgLmZucyA9IH5zdW0oaXMubmEoLngpKSkpDQoNCmBgYA0KDQoNCjxicj4NCg0KKipUYXNrIDIuKiogIA0KSW52ZXN0aWdhdGUgdGhlIGRpc3RyaWJ1dGlvbiBvZiBgbG90X2FyZWFgLiBJcyB0aGUgZGlzdHJpYnV0aW9uIHJvdWdobHkgbm9ybWFsPyBJZiBub3QsIHdoYXQgcHJvYmxlbXMgZG8geW91IGZpbmQ/DQoNCjxicj4NCg0KKipBbnMgMi4gKioNCg0KYGBge3J9DQphbWVzICU+JQ0KICBnZ3Bsb3QoYWVzKHggPSBsb3RfYXJlYSkpICsNCiAgZ2VvbV9oaXN0b2dyYW0oY29sID0gIndoaXRlIiwgZmlsbCA9ICJzdGVlbCBibHVlIiwgYWxwaGEgPSAwLjcsIGJpbndpZHRoID0gMjAwMCkNCmBgYA0KDQpUaGUgZGlzdHJpYnV0aW9uIGlzIG5vdCBub3JtYWwuIFRoZXJlJ3MgYSBzaWduaWZpY2FudCByaWdodC1za2V3Lg0KDQo8YnI+DQoNCioqVGFzayAzLioqICANCkNvbXB1dGUgYW5kIHZpc3VhbGlzZSBhIGJvb3RzdHJhcCBzYW1wbGluZyBkaXN0cmlidXRpb24gZm9yIHRoZSBgbWVhbihsb3RfYXJlYSlgIG9mIHRoZSBzb2xkIGhvdXNlcy4NCg0KPGJyPg0KDQoqKkFucyAzLiAqKg0KYGBge3J9DQpib290c3RyYXBfc2FtcGxlIDwtIGFtZXMgJT4lDQogIHJlcF9zYW1wbGVfbihzaXplID0gbnJvdyhhbWVzKSwgcmVwbGFjZSA9IFRSVUUsIHJlcHMgPSAxMDAwKSAlPiUgDQogIHN1bW1hcmlzZSgNCiAgICBtZWFuX2xvdF9hcmVhID0gbWVhbihsb3RfYXJlYSkNCiAgKQ0KDQpib290c3RyYXBfc2FtcGxlIA0KYGBgDQoNCmBgYHtyfQ0KYm9vdHN0cmFwX3NhbXBsZSAlPiUgDQogIGdncGxvdChhZXMoeCA9IG1lYW5fbG90X2FyZWEpKSArDQogIGdlb21faGlzdG9ncmFtKGNvbCA9ICJ3aGl0ZSIsIGZpbGwgPSAic3RlZWwgYmx1ZSIsIGFscGhhID0gMC43KQ0KYGBgDQoNCmBgYHtyfQ0Kc3VtbWFyeV9zYW1wbGUgPC0gYm9vdHN0cmFwX3NhbXBsZSAlPiUNCiAgc3VtbWFyaXNlKG1lYW5fbG90X2FyZWEgPSBtZWFuKG1lYW5fbG90X2FyZWEpKSANCg0Kc3VtbWFyeV9zYW1wbGUNCmBgYA0KDQoNCjxicj4NCg0KKipUYXNrIDQuKiogIA0KVXNlIHlvdXIgYm9vdHN0cmFwIGRpc3RyaWJ1dGlvbiB0byBjYWxjdWxhdGUgYSAkOTVcJSQgQ0kgZm9yIGBtZWFuKGxvdF9hcmVhKWAsIGFuZCB2aXN1YWxpc2UgaXQgb24gdGhlIGRpc3RyaWJ1dGlvbg0KDQo8YnI+DQoNCioqQW5zIDQuICoqDQpgYGB7cn0NCiMgc3BlY2lmeSB0aGF0IHdlIHdhbnQgdG8gbG9vayBhdCB0ZW51cmUgdmFyaWFibGUsIDEwMDAgcmVwcywgYW5kIHdhbnQgdG8gY2FsY3VsYXRlIHRoZSBtZWFuDQppbmZlcl9yZXNhbXBsZSA8LSBib290c3RyYXBfc2FtcGxlICU+JQ0KICBzcGVjaWZ5KHJlc3BvbnNlID0gbWVhbl9sb3RfYXJlYSkgJT4lDQogIGdlbmVyYXRlKHJlcHMgPSAxMDAwLCB0eXBlID0gImJvb3RzdHJhcCIpICU+JQ0KICBjYWxjdWxhdGUoc3RhdCA9ICJtZWFuIikNCg0KaW5mZXJfcmVzYW1wbGUNCmBgYA0KDQpgYGB7cn0NCmluZmVyX2NpXzk1IDwtIGluZmVyX3Jlc2FtcGxlICU+JSANCiAgZ2V0X2NvbmZpZGVuY2VfaW50ZXJ2YWwobGV2ZWwgPSAwLjk1LCB0eXBlID0gInBlcmNlbnRpbGUiKQ0KDQppbmZlcl9jaV85NQ0KYGBgDQoNCmBgYHtyfQ0KaW5mZXJfcmVzYW1wbGUgJT4lDQogIHZpc3VhbGlzZShiaW4gPSAyMCkgKw0KICBzaGFkZV9jb25maWRlbmNlX2ludGVydmFsKGVuZHBvaW50cyA9IGluZmVyX2NpXzk1KQ0KYGBgDQoNCg0KPGJyPg0KDQoqKlRhc2sgNS4qKiAgDQpZb3Ugd291bGQgbGlrZSB0byBrbm93IHRoZSBgbWVhbihsb3RfYXJlYSlgIG9mIHRoZSBzb2xkIGhvdXNlcyB3aXRoIGhpZ2hlciBjb25maWRlbmNlLiBDYWxjdWxhdGUgdGhlICQ5OVwlJCBDSSBmb3IgdGhpcyB2YXJpYWJsZSAoeW91IGNhbiByZS11c2UgeW91ciBib290c3RyYXAgZGlzdHJpYnV0aW9uIGZyb20gYWJvdmUpLiBJcyBpdCBuYXJyb3dlciBvciBicm9hZGVyIHRoYW4gdGhlICQ5NVwlJCBDST8gRG9lcyB0aGF0IG1ha2Ugc2Vuc2U/DQoNCjxicj4NCg0KKipBbnMgNS4gKioNCmBgYHtyfQ0KaW5mZXJfY2lfOTkgPC0gaW5mZXJfcmVzYW1wbGUgJT4lIA0KICBnZXRfY29uZmlkZW5jZV9pbnRlcnZhbChsZXZlbCA9IDAuOTksIHR5cGUgPSAicGVyY2VudGlsZSIpDQoNCmluZmVyX2NpXzk5DQpgYGANCg0KVGhlICQ5OVwlJCBDSSBpcyB3aWRlciB0aGFuIHRoZSAkOTVcJSQgQ0kuIFRoZSBib3VuZHMgaGF2ZSBtb3ZlZCBhd2F5IGZyb20gdGhlIG1lYW4gbG90IGFyZWEuDQoNCjxicj4NCg0KKipUYXNrIDYuKiogIA0KQ2FsY3VsYXRlIHRoZSBwb2ludCBlc3RpbWF0ZSBvZiB0aGUgYG1lYW4obG90X2FyZWEpYA0KDQo8YnI+DQoNCioqQW5zIDYuICoqDQpgYGB7cn0NCnNhbXBsZV8yMDAgPC0gYW1lcyAlPiUNCiAgcmVwX3NhbXBsZV9uKHNpemUgPSAyMDAsIHJlcHMgPSAxKQ0KDQpzYW1wbGVfMjAwDQoNCnN1bW1hcnlfc2FtcGxlXzIwMCA8LSBzYW1wbGVfMjAwICU+JQ0KICB1bmdyb3VwKCkgJT4lDQogIHN1bW1hcmlzZShtZWFuX2xvdF9hcmVhID0gbWVhbihsb3RfYXJlYSkpIA0KDQpzdW1tYXJ5X3NhbXBsZV8yMDANCmBgYA0KDQoNCjxocj4NCg0KIyBFeHRlbnNpb24NCg0KPGJyPg0KDQoqKlRhc2sgMS4qKiAgDQpDYWxjdWxhdGUgYSBwb2ludCBlc3RpbWF0ZSBhbmQgJDk1XCUkIENJIGZvciB0aGUgcHJvcG9ydGlvbiBvZiBob3VzZXMgaW4gdGhlIGRhdGEgYnVpbHQgYmVmb3JlIDE5MjAuICBEb2VzIHRoZSBudW1iZXIgb2YgYHJlcHNgIHlvdSB1c2UgbWF0dGVyPyBbSW52ZXN0aWdhdGUgYHJlcHNgIGZyb20gJDIwMCQgdXAgdG8gJDUwMDAwJCwgbWVtb3J5IG9mIHlvdXIgbGFwdG9wIHBlcm1pdHRpbmddLg0KPGJyPjxicj4NCg0KKipBbnMuICoqDQpgYGB7cn0NCnNhbXBsZV95ZWFyX2J1aWx0IDwtIGFtZXMgJT4lDQogIGZpbHRlcih5ZWFyX2J1aWx0IDwgMTkyMCkgJT4lIA0KICByZXBfc2FtcGxlX24oc2l6ZSA9IDEwMCwgcmVwcyA9IDEpDQoNCnNhbXBsZV95ZWFyX2J1aWx0DQoNCnN1bW1hcnlfc2FtcGxlX3llYXJfYnVpbHQgPC0gc2FtcGxlX3llYXJfYnVpbHQgJT4lDQogIHVuZ3JvdXAoKSAlPiUNCiAgc3VtbWFyaXNlKA0KICAgIG1lYW5fbG90X2FyZWEgPSBtZWFuKGxvdF9hcmVhKSwNCiAgICBsb3dlcl9ib3VuZCA9IHF1YW50aWxlKGxvdF9hcmVhLCBwcm9icyA9IDAuMDI1KSwNCiAgICB1cHBlcl9ib3VuZCA9IHF1YW50aWxlKGxvdF9hcmVhLCBwcm9icyA9IDAuOTc1KSwNCiAgICBjaV93aWR0aCA9IHVwcGVyX2JvdW5kIC0gbG93ZXJfYm91bmQNCiAgICApIA0KDQpzdW1tYXJ5X3NhbXBsZV95ZWFyX2J1aWx0DQpgYGANCg0KDQoNCg0KDQpgYGB7cn0NCnNhbXBsZV95ZWFyX2J1aWx0IDwtIGFtZXMgJT4lDQogIGZpbHRlcih5ZWFyX2J1aWx0IDwgMTkyMCkgJT4lIA0KICByZXBfc2FtcGxlX24oc2l6ZSA9IDEwMCwgcmVwcyA9IDIwMCkNCg0Kc2FtcGxlX3llYXJfYnVpbHQNCg0Kc3VtbWFyeV9zYW1wbGVfeWVhcl9idWlsdCA8LSBzYW1wbGVfeWVhcl9idWlsdCAlPiUNCiAgdW5ncm91cCgpICU+JQ0KICBzdW1tYXJpc2UoDQogICAgbWVhbl9sb3RfYXJlYSA9IG1lYW4obG90X2FyZWEpLA0KICAgIGxvd2VyX2JvdW5kID0gcXVhbnRpbGUobG90X2FyZWEsIHByb2JzID0gMC4wMjUpLA0KICAgIHVwcGVyX2JvdW5kID0gcXVhbnRpbGUobG90X2FyZWEsIHByb2JzID0gMC45NzUpLA0KICAgIGNpX3dpZHRoID0gdXBwZXJfYm91bmQgLSBsb3dlcl9ib3VuZA0KICAgICkgDQoNCnN1bW1hcnlfc2FtcGxlX3llYXJfYnVpbHQNCmBgYA0KDQoNCmBgYHtyfQ0Kc2FtcGxlX3llYXJfYnVpbHQgPC0gYW1lcyAlPiUNCiAgZmlsdGVyKHllYXJfYnVpbHQgPCAxOTIwKSAlPiUgDQogIHJlcF9zYW1wbGVfbihzaXplID0gMTAwLCByZXBzID0gMTAwMCkNCg0Kc2FtcGxlX3llYXJfYnVpbHQNCg0Kc3VtbWFyeV9zYW1wbGVfeWVhcl9idWlsdCA8LSBzYW1wbGVfeWVhcl9idWlsdCAlPiUNCiAgdW5ncm91cCgpICU+JQ0KICBzdW1tYXJpc2UoDQogICAgbWVhbl9sb3RfYXJlYSA9IG1lYW4obG90X2FyZWEpLA0KICAgIGxvd2VyX2JvdW5kID0gcXVhbnRpbGUobG90X2FyZWEsIHByb2JzID0gMC4wMjUpLA0KICAgIHVwcGVyX2JvdW5kID0gcXVhbnRpbGUobG90X2FyZWEsIHByb2JzID0gMC45NzUpLA0KICAgIGNpX3dpZHRoID0gdXBwZXJfYm91bmQgLSBsb3dlcl9ib3VuZA0KICAgICkgDQoNCnN1bW1hcnlfc2FtcGxlX3llYXJfYnVpbHQNCmBgYA0KDQoNCg0KDQpgYGB7cn0NCnNhbXBsZV95ZWFyX2J1aWx0IDwtIGFtZXMgJT4lDQogIGZpbHRlcih5ZWFyX2J1aWx0IDwgMTkyMCkgJT4lIA0KICByZXBfc2FtcGxlX24oc2l6ZSA9IDEwMCwgcmVwcyA9IDIwMDApDQoNCnNhbXBsZV95ZWFyX2J1aWx0DQoNCnN1bW1hcnlfc2FtcGxlX3llYXJfYnVpbHQgPC0gc2FtcGxlX3llYXJfYnVpbHQgJT4lDQogIHVuZ3JvdXAoKSAlPiUNCiAgc3VtbWFyaXNlKA0KICAgIG1lYW5fbG90X2FyZWEgPSBtZWFuKGxvdF9hcmVhKSwNCiAgICBsb3dlcl9ib3VuZCA9IHF1YW50aWxlKGxvdF9hcmVhLCBwcm9icyA9IDAuMDI1KSwNCiAgICB1cHBlcl9ib3VuZCA9IHF1YW50aWxlKGxvdF9hcmVhLCBwcm9icyA9IDAuOTc1KSwNCiAgICBjaV93aWR0aCA9IHVwcGVyX2JvdW5kIC0gbG93ZXJfYm91bmQNCiAgICApIA0KDQpzdW1tYXJ5X3NhbXBsZV95ZWFyX2J1aWx0DQpgYGANCg0KDQoNCg0KDQoNCmBgYHtyfQ0Kc2FtcGxlX3llYXJfYnVpbHQgPC0gYW1lcyAlPiUNCiAgZmlsdGVyKHllYXJfYnVpbHQgPCAxOTIwKSAlPiUgDQogIHJlcF9zYW1wbGVfbihzaXplID0gMTAwLCByZXBzID0gNTAwMCkNCg0Kc2FtcGxlX3llYXJfYnVpbHQNCg0Kc3VtbWFyeV9zYW1wbGVfeWVhcl9idWlsdCA8LSBzYW1wbGVfeWVhcl9idWlsdCAlPiUNCiAgdW5ncm91cCgpICU+JQ0KICBzdW1tYXJpc2UoDQogICAgbWVhbl9sb3RfYXJlYSA9IG1lYW4obG90X2FyZWEpLA0KICAgIGxvd2VyX2JvdW5kID0gcXVhbnRpbGUobG90X2FyZWEsIHByb2JzID0gMC4wMjUpLA0KICAgIHVwcGVyX2JvdW5kID0gcXVhbnRpbGUobG90X2FyZWEsIHByb2JzID0gMC45NzUpLA0KICAgIGNpX3dpZHRoID0gdXBwZXJfYm91bmQgLSBsb3dlcl9ib3VuZA0KICAgICkgDQoNCnN1bW1hcnlfc2FtcGxlX3llYXJfYnVpbHQNCmBgYA0KDQpgYGB7cn0NCnNhbXBsZV95ZWFyX2J1aWx0IDwtIGFtZXMgJT4lDQogIGZpbHRlcih5ZWFyX2J1aWx0IDwgMTkyMCkgJT4lIA0KICByZXBfc2FtcGxlX24oc2l6ZSA9IDEwMCwgcmVwcyA9IDEwMDAwKQ0KDQpzYW1wbGVfeWVhcl9idWlsdA0KDQpzdW1tYXJ5X3NhbXBsZV95ZWFyX2J1aWx0IDwtIHNhbXBsZV95ZWFyX2J1aWx0ICU+JQ0KICB1bmdyb3VwKCkgJT4lDQogIHN1bW1hcmlzZSgNCiAgICBtZWFuX2xvdF9hcmVhID0gbWVhbihsb3RfYXJlYSksDQogICAgbG93ZXJfYm91bmQgPSBxdWFudGlsZShsb3RfYXJlYSwgcHJvYnMgPSAwLjAyNSksDQogICAgdXBwZXJfYm91bmQgPSBxdWFudGlsZShsb3RfYXJlYSwgcHJvYnMgPSAwLjk3NSksDQogICAgY2lfd2lkdGggPSB1cHBlcl9ib3VuZCAtIGxvd2VyX2JvdW5kDQogICAgKSANCg0Kc3VtbWFyeV9zYW1wbGVfeWVhcl9idWlsdA0KYGBgDQoNCg0KDQoNCg0KDQoNCmBgYHtyfQ0Kc2FtcGxlX3llYXJfYnVpbHQgPC0gYW1lcyAlPiUNCiAgZmlsdGVyKHllYXJfYnVpbHQgPCAxOTIwKSAlPiUgDQogIHJlcF9zYW1wbGVfbihzaXplID0gMTAwLCByZXBzID0gNTAwMDApDQoNCnNhbXBsZV95ZWFyX2J1aWx0DQoNCnN1bW1hcnlfc2FtcGxlX3llYXJfYnVpbHQgPC0gc2FtcGxlX3llYXJfYnVpbHQgJT4lDQogIHVuZ3JvdXAoKSAlPiUNCiAgc3VtbWFyaXNlKA0KICAgIG1lYW5fbG90X2FyZWEgPSBtZWFuKGxvdF9hcmVhKSwNCiAgICBsb3dlcl9ib3VuZCA9IHF1YW50aWxlKGxvdF9hcmVhLCBwcm9icyA9IDAuMDI1KSwNCiAgICB1cHBlcl9ib3VuZCA9IHF1YW50aWxlKGxvdF9hcmVhLCBwcm9icyA9IDAuOTc1KSwNCiAgICBjaV93aWR0aCA9IHVwcGVyX2JvdW5kIC0gbG93ZXJfYm91bmQNCiAgICApIA0KDQpzdW1tYXJ5X3NhbXBsZV95ZWFyX2J1aWx0DQpgYGANCg0KDQpGb3IgbW9yZSB0aGFuIDIwMCByZXBldGl0aW9ucywgdGhlcmUncyB2ZXJ5IGxpdHRsZSBjaGFuZ2UgaW4gdGhlIG1lYW5fbG90X2FyZWEgY2FsY3VsYXRpb24gYW5kIG5vIGNoYW5nZSBvYnNlcnZlZCBpbiB0aGUgQ0kgY2FsY3VsYXRpb24uIA0KDQoNCg0KDQoNCg0K